{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d124d22-de73-436b-86cd-9b162b469be8",
   "metadata": {
    "id": "2d124d22-de73-436b-86cd-9b162b469be8",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%pip install --upgrade pip\n",
    "\n",
    "# Uninstall conflicting packages\n",
    "%pip uninstall -y langchain-core langchain-openai langchain-experimental langchain-community langchain chromadb beautifulsoup4 python-dotenv PyPDF2 rank_bm25\n",
    "\n",
    "# Install compatible versions of langchain-core and langchain-openai\n",
    "%pip install langchain-core==0.3.6\n",
    "%pip install langchain-openai==0.2.1\n",
    "%pip install langchain-experimental==0.3.2\n",
    "%pip install langchain-community==0.3.1\n",
    "%pip install langchain==0.3.1\n",
    "\n",
    "# Install remaining packages\n",
    "%pip install chromadb==0.5.11\n",
    "%pip install python-dotenv==1.0.1\n",
    "\n",
    "# new\n",
    "%pip install PyPDF2==3.0.1 -q --user\n",
    "%pip install rank_bm25==0.2.2\n",
    "\n",
    "# Restart the kernel after installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f884314f-870c-4bfb-b6c1-a5b4801ec172",
   "metadata": {
    "id": "f884314f-870c-4bfb-b6c1-a5b4801ec172"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['USER_AGENT'] = 'RAGUserAgent'\n",
    "import openai\n",
    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "from langchain import hub\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "import chromadb\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_core.runnables import RunnableParallel\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "from langchain_core.prompts import PromptTemplate\n",
    "\n",
    "# new\n",
    "from PyPDF2 import PdfReader\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "from langchain.docstore.document import Document\n",
    "from langchain_community.retrievers import BM25Retriever"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "eba3468a-d7c2-4a79-8df2-c335542950f2",
   "metadata": {
    "id": "eba3468a-d7c2-4a79-8df2-c335542950f2"
   },
   "outputs": [],
   "source": [
    "# variables\n",
    "_ = load_dotenv(dotenv_path='env.txt')\n",
    "os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')\n",
    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
    "embedding_function = OpenAIEmbeddings()\n",
    "llm = ChatOpenAI(model_name=\"gpt-4o-mini\", temperature=0)\n",
    "pdf_path = \"google-2023-environmental-report.pdf\"\n",
    "collection_name = \"google_environmental_report\"\n",
    "str_output_parser = StrOutputParser()\n",
    "user_query = \"What are Google's environmental initiatives?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d3ad428a-3eb6-40ec-a1a5-62565ead1e5b",
   "metadata": {
    "id": "d3ad428a-3eb6-40ec-a1a5-62565ead1e5b"
   },
   "outputs": [],
   "source": [
    "#### INDEXING ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "98ccda2c-0f4c-41c5-804d-2227cdf35aa7",
   "metadata": {
    "id": "98ccda2c-0f4c-41c5-804d-2227cdf35aa7"
   },
   "outputs": [],
   "source": [
    "# Load the PDF and extract text\n",
    "pdf_reader = PdfReader(pdf_path)\n",
    "text = \"\"\n",
    "for page in pdf_reader.pages:\n",
    "    text += page.extract_text()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "927a4c65-aa05-486c-8295-2f99673e7c20",
   "metadata": {
    "id": "927a4c65-aa05-486c-8295-2f99673e7c20"
   },
   "outputs": [],
   "source": [
    "# Split\n",
    "character_splitter = RecursiveCharacterTextSplitter(\n",
    "    separators=[\"\\n\\n\", \"\\n\", \". \", \" \", \"\"],\n",
    "    chunk_size=1000,\n",
    "    chunk_overlap=200\n",
    ")\n",
    "splits = character_splitter.split_text(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "e75bc323-8470-4032-8cfa-e5e5f1651e2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "documents = [Document(page_content=text, metadata={\"id\": str(i)}) for i, text in enumerate(splits)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6b13568c-d633-464d-8c43-0d55f34cc8c1",
   "metadata": {
    "id": "6b13568c-d633-464d-8c43-0d55f34cc8c1"
   },
   "outputs": [],
   "source": [
    "chroma_client = chromadb.Client()\n",
    "vectorstore = Chroma.from_documents(\n",
    "    documents=documents,\n",
    "    embedding=embedding_function,\n",
    "    collection_name=collection_name,\n",
    "    client=chroma_client,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "151fcbb4-59bd-42d1-9a50-6351115eb994",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create dense retriever\n",
    "dense_retriever = vectorstore.as_retriever(search_kwargs={\"k\": 10})\n",
    "# Create sparse retriever\n",
    "sparse_retriever = BM25Retriever.from_documents(documents, k=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "dEyT0zIFGepB",
   "metadata": {
    "id": "dEyT0zIFGepB"
   },
   "outputs": [],
   "source": [
    "# Custom hybrid search function (as opposed to using LangChain EnsembleRetriever)\n",
    "def hybrid_search(query, k=10, dense_weight=0.5, sparse_weight=0.5):\n",
    "    # Step 1: Retrieve the top-k documents from both dense search and sparse search.\n",
    "    dense_docs = dense_retriever.get_relevant_documents(query)[:k]\n",
    "    dense_doc_ids = [doc.metadata['id'] for doc in dense_docs]\n",
    "    print(\"\\nCompare IDs:\")\n",
    "    print(\"dense IDs: \", dense_doc_ids)\n",
    "    sparse_docs = sparse_retriever.get_relevant_documents(query)[:k]\n",
    "    sparse_doc_ids = [doc.metadata['id'] for doc in sparse_docs]\n",
    "    print(\"sparse IDs: \", sparse_doc_ids)\n",
    "\n",
    "    # Combine the document IDs and remove duplicates\n",
    "    all_doc_ids = list(set(dense_doc_ids + sparse_doc_ids))\n",
    "\n",
    "    # Create dictionaries to store the reciprocal ranks\n",
    "    dense_reciprocal_ranks = {doc_id: 0.0 for doc_id in all_doc_ids}\n",
    "    sparse_reciprocal_ranks = {doc_id: 0.0 for doc_id in all_doc_ids}\n",
    "\n",
    "    # Step 2: Calculate the reciprocal rank for each document in dense and sparse search results.\n",
    "    for i, doc_id in enumerate(dense_doc_ids):\n",
    "        dense_reciprocal_ranks[doc_id] = 1.0 / (i + 1)\n",
    "\n",
    "    for i, doc_id in enumerate(sparse_doc_ids):\n",
    "        sparse_reciprocal_ranks[doc_id] = 1.0 / (i + 1)\n",
    "\n",
    "    # Step 3: Sum the reciprocal ranks for each document.\n",
    "    combined_reciprocal_ranks = {doc_id: 0.0 for doc_id in all_doc_ids}\n",
    "    for doc_id in all_doc_ids:\n",
    "        combined_reciprocal_ranks[doc_id] = dense_weight * dense_reciprocal_ranks[doc_id] + sparse_weight * sparse_reciprocal_ranks[doc_id]\n",
    "\n",
    "    # Step 4: Sort the documents based on their combined reciprocal rank scores.\n",
    "    sorted_doc_ids = sorted(all_doc_ids, key=lambda doc_id: combined_reciprocal_ranks[doc_id], reverse=True)\n",
    "\n",
    "    # Step 5: Retrieve the documents based on the sorted document IDs.\n",
    "    sorted_docs = []\n",
    "    all_docs = dense_docs + sparse_docs\n",
    "    for doc_id in sorted_doc_ids:\n",
    "        matching_docs = [doc for doc in all_docs if doc.metadata['id'] == doc_id]\n",
    "        if matching_docs:\n",
    "            doc = matching_docs[0]\n",
    "            doc.metadata['score'] = combined_reciprocal_ranks[doc_id]\n",
    "            doc.metadata['rank'] = sorted_doc_ids.index(doc_id) + 1\n",
    "            if len(matching_docs) > 1:\n",
    "                doc.metadata['retriever'] = 'both'\n",
    "            elif doc in dense_docs:\n",
    "                doc.metadata['retriever'] = 'dense'\n",
    "            else:\n",
    "                doc.metadata['retriever'] = 'sparse'\n",
    "            sorted_docs.append(doc)\n",
    "\n",
    "    # Step 7: Return the final ranked and sorted list, truncated by the top-k parameter\n",
    "    return sorted_docs[:k]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6ce8df01-925b-45b5-8fb8-17b5c40c581f",
   "metadata": {
    "id": "6ce8df01-925b-45b5-8fb8-17b5c40c581f"
   },
   "outputs": [],
   "source": [
    "#### RETRIEVAL and GENERATION ####"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "fac053d8-b871-4b50-b04e-28dec9fb3b0f",
   "metadata": {
    "id": "fac053d8-b871-4b50-b04e-28dec9fb3b0f"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.11/site-packages/langsmith/client.py:323: LangSmithMissingAPIKeyWarning: API key must be provided when using hosted LangSmith API\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# Prompt - ignore LangSmith warning, you will not need langsmith for this coding exercise\n",
    "prompt = hub.pull(\"jclemens24/rag-prompt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "5ef30632-13dd-4a34-af33-cb8fab94f169",
   "metadata": {
    "id": "5ef30632-13dd-4a34-af33-cb8fab94f169"
   },
   "outputs": [],
   "source": [
    "# Relevance check prompt\n",
    "relevance_prompt_template = PromptTemplate.from_template(\n",
    "    \"\"\"\n",
    "    Given the following question and retrieved context, determine if the context is relevant to the question.\n",
    "    Provide a score from 1 to 5, where 1 is not at all relevant and 5 is highly relevant.\n",
    "    Return ONLY the numeric score, without any additional text or explanation.\n",
    "\n",
    "    Question: {question}\n",
    "    Retrieved Context: {retrieved_context}\n",
    "\n",
    "    Relevance Score:\"\"\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e8975479-b3e3-481d-ad7b-08b4eb3faaef",
   "metadata": {
    "id": "e8975479-b3e3-481d-ad7b-08b4eb3faaef"
   },
   "outputs": [],
   "source": [
    "# Post-processing\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fd9db713-f705-4b65-800e-2c4e3d0e4ef4",
   "metadata": {
    "id": "fd9db713-f705-4b65-800e-2c4e3d0e4ef4"
   },
   "outputs": [],
   "source": [
    "def extract_score(llm_output):\n",
    "    try:\n",
    "        score = float(llm_output.strip())\n",
    "        return score\n",
    "    except ValueError:\n",
    "        return 0\n",
    "\n",
    "# Chain it all together with LangChain\n",
    "def conditional_answer(x):\n",
    "    relevance_score = extract_score(x['relevance_score'])\n",
    "    if relevance_score < 4:\n",
    "        return \"I don't know.\"\n",
    "    else:\n",
    "        return x['answer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "507a4b15-e13d-4fff-8093-5735683b9043",
   "metadata": {},
   "outputs": [],
   "source": [
    "rag_chain_from_docs = (\n",
    "    RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
    "    | RunnableParallel(\n",
    "        {\"relevance_score\": (\n",
    "            RunnablePassthrough()\n",
    "            | (lambda x: relevance_prompt_template.format(question=x['question'], retrieved_context=x['context']))\n",
    "            | llm\n",
    "            | str_output_parser\n",
    "        ), \"answer\": (\n",
    "            RunnablePassthrough()\n",
    "            | prompt\n",
    "            | llm\n",
    "            | str_output_parser\n",
    "        )}\n",
    "    )\n",
    "    | RunnablePassthrough().assign(final_answer=conditional_answer)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "dc5c2ab0-9191-40f7-abf2-681f1c751429",
   "metadata": {
    "id": "dc5c2ab0-9191-40f7-abf2-681f1c751429"
   },
   "outputs": [],
   "source": [
    "rag_chain_with_source = RunnableParallel(\n",
    "    {\"context\": hybrid_search, \"question\": RunnablePassthrough()}\n",
    ").assign(answer=rag_chain_from_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8b30177a-f9ab-45e4-812d-33b0f97325bd",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 3672,
     "status": "ok",
     "timestamp": 1715351849601,
     "user": {
      "displayName": "",
      "userId": ""
     },
     "user_tz": 240
    },
    "id": "8b30177a-f9ab-45e4-812d-33b0f97325bd",
    "outputId": "ea7aab55-11e5-4a78-fd1e-8e8a8f7e32b5",
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_17181/1271674249.py:4: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 1.0. Use :meth:`~invoke` instead.\n",
      "  dense_docs = dense_retriever.get_relevant_documents(query)[:k]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Compare IDs:\n",
      "dense IDs:  ['451', '12', '311', '344', '13', '115', '67', '346', '111', '66']\n",
      "sparse IDs:  ['150', '309', '298', '311', '328', '415', '139', '432', '91', '22']\n",
      "\n",
      "Original Question: What are Google's environmental initiatives?\n",
      "\n",
      "Relevance Score: 5\n",
      "\n",
      "Final Answer:\n",
      "Google's environmental initiatives focus on several key areas:\n",
      "\n",
      "1. **Sustainability Strategy**: Google has an updated environmental sustainability strategy organized around three pillars: empowering individuals to take action, collaborating with partners and customers, and operating the business sustainably. They aim to help 1 billion people make more sustainable choices through features in their products, such as eco-friendly routing in Google Maps and energy efficiency in Google Nest thermostats.\n",
      "\n",
      "2. **Supplier Engagement**: Google works with its suppliers to reduce energy consumption and greenhouse gas (GHG) emissions. They require suppliers to report environmental data and assess their practices to manage and reduce emissions.\n",
      "\n",
      "3. **Net-Zero Carbon Goals**: Google has committed to achieving net-zero carbon emissions and has invested in renewable energy projects, such as the Golden Hills wind farm in California.\n",
      "\n",
      "4. **Water Stewardship and Circular Economy**: Their Bay View campus is designed to be all-electric and net water-positive, incorporating principles of circular design. They also focus on managing resources efficiently and reducing waste.\n",
      "\n",
      "5. **Climate Initiatives and Partnerships**: Google is involved in various coalitions and initiatives, such as the iMasons Climate Accord for carbon reduction in digital infrastructure and the ReFED Catalytic Grant Fund to address food waste. They support projects with organizations like The Nature Conservancy for reforestation and conservation efforts.\n",
      "\n",
      "6. **Public Policy Advocacy**: Google engages in public policy discussions to promote sustainable practices and has provided input on regulations related to the repair and reuse of goods.\n",
      "\n",
      "7. **Technological Solutions**: They leverage technology to monitor environmental changes and improve sustainability practices, such as through Google Earth Engine and other data-driven tools.\n",
      "\n",
      "Overall, Google's initiatives aim to create a significant positive impact on sustainability and climate action through collaboration, innovation, and responsible operations.\n",
      "\n",
      "\n",
      "Retrieved Documents:\n",
      "Document 1: Document ID: 150 Score: 0.5 Rank: 1 Retriever: sparse\n",
      "\n",
      "Content:\n",
      "sustainability, and we’re partnering with them to develop decarbonization roadmaps and build essential data infrastructure to accurately quantify emissions and reductions across the value chain.\n",
      "We engage with our suppliers—including hardware \n",
      "manufacturing and indirect services suppliers—to help reduce their energy consumption and GHG emissions, as stated in our Supplier Code of Conduct , which all \n",
      "suppliers are required to sign. We assess suppliers’ practices to report, manage, and reduce their emissions and incorporate this into our supplier scorecard.\n",
      "Reporting  \n",
      "environmental data\n",
      "We expect all our suppliers to report environmental data,\n",
      "\n",
      "Document 2: Document ID: 451 Score: 0.5 Rank: 2 Retriever: dense\n",
      "\n",
      "Content:\n",
      "Empowering individuals:  \n",
      "A parking lot full of electric vehicles lined up outside a Google office, \n",
      "plugged into charging stations.\n",
      "Working together:  \n",
      "Satellite-derived Earth Engine image showing seasonal agricultural peaks \n",
      "near the Columbia and Snake Rivers in Washington state. The perfectly round fields are center pivot irrigated corn and wheat maturing in different months. Data source: Landsat 8, U.S. Geological Survey.\n",
      "Operating sustainably:  \n",
      "A view of our Bay View campus with the events center in the foreground \n",
      "and a Google brandmark sculpture. (Photo: Iwan Baan)\n",
      "Net-zero carbon:  \n",
      "Golden Hills wind farm in California (43 MW for Google)\n",
      "Water stewardship:  \n",
      "Our Bay View campus, as seen from across its stormwater retention pond. (Photo: Iwan Baan)\n",
      "Circular economy:  \n",
      "A closeup of many small, broken circuit boards in a pile. Our approach\n",
      "\n",
      "Document 3: Document ID: 311 Score: 0.29166666666666663 Rank: 3 Retriever: both\n",
      "\n",
      "Content:\n",
      "In 2022, we audited a subset of our suppliers to verify \n",
      "compliance for the following environmental criteria: implementation of environmental management systems, environmental permits and reporting, product content restrictions, and resource efficiency, as well as management of hazardous substances, wastewater,  solid waste, and air emissions.\n",
      "Googlers chat among indoor plants at our Pier 57 office in New York City.   79\n",
      "2023 Environmental Report  Public policy and advocacy\n",
      "We know that strong public policy action is critical to \n",
      "creating prosperous, equitable, and resilient low-carbon economies around the world. \n",
      "The United Nations Framework Convention on Climate \n",
      "Change (UNFCCC)’s 2015 Paris Agreement states that humanity must “keep global temperature rise this century well below 2°C above pre-industrial levels.”\n",
      " 143 Google\n",
      "\n",
      "Document 4: Document ID: 12 Score: 0.25 Rank: 4 Retriever: dense\n",
      "\n",
      "Content:\n",
      "The opportunity we have through our products and \n",
      "platforms is reflected in our updated environmental sustainability strategy, which focuses on where we can make the most significant positive impact. Our work is organized around three key pillars: empowering individuals to take action, working together with our partners and customers, and operating our business sustainably.\n",
      "In 2022, we reached our goal to help 1 billion people \n",
      "make more sustainable choices through our products. We achieved this by offering sustainability features like eco-friendly routing in Google Maps, energy efficiency features in Google Nest thermostats, and carbon emissions information in Google Flights. Looking ahead, our aspiration is to help individuals, cities, and other partners collectively reduce 1 gigaton of their carbon equivalent emissions annually by 2030.\n",
      " 2\n",
      "\n",
      "Document 5: Document ID: 309 Score: 0.25 Rank: 5 Retriever: sparse\n",
      "\n",
      "Content:\n",
      "that enable us to ensure that those we partner with are responsible environmental stewards. Along with having suppliers evaluate their operations, we perform our own ongoing due diligence and audits to verify compliance and to understand our supply chain’s current and potential risks.\n",
      "When we find that a supplier isn’t complying, we expect\n",
      "\n",
      "Document 6: Document ID: 298 Score: 0.16666666666666666 Rank: 6 Retriever: sparse\n",
      "\n",
      "Content:\n",
      "2023 Environmental Report  Risk management\n",
      "Our Enterprise Risk Management (ERM) team is responsible \n",
      "for identifying, assessing, and reporting risks related to the company’s operations, financial performance, and reputation. As with financial, operational, and strategic risks, the team assesses environmental risks as part of the company’s overall risk management framework. The risks and opportunities identified through this process support public disclosures and inform Google’s environmental sustainability strategy. Our Chief Sustainability Officer and sustainability teams work to address risks by identifying opportunities to reduce the company’s environmental impacts from its operations and value chain, and through improving climate resilience. \n",
      "Climate-related \n",
      "risks\n",
      "Climate-related risks and opportunities have long time\n",
      "\n",
      "Document 7: Document ID: 344 Score: 0.125 Rank: 7 Retriever: dense\n",
      "\n",
      "Content:\n",
      "iMasons Climate AccordGoogle is a founding member and part of the governing body of the iMasons Climate Accord, a coalition united on carbon reduction in digital infrastructure.\n",
      "ReFEDIn 2022, to activate industry-wide change, Google provided anchor funding to kickstart the ReFED Catalytic Grant Fund, with the goal of accelerating and scaling food waste solutions.\n",
      "The Nature Conservancy (TNC)In 2022, Google supported three of the Nature Conservancy’s watershed projects in Chile and the United States, and Google.org supported a three-phased approach to catalyze active reforestation of kelp at impactful scales. Google.org also provided a grant to TNC to develop a machine-learning-powered timber-tracing API to stop deforestation in the Amazon at scale; a team of Google engineers is working full-time for six months with TNC to develop this product as part of the Google.org Fellowship Program.\n",
      "\n",
      "Document 8: Document ID: 13 Score: 0.1 Rank: 8 Retriever: dense\n",
      "\n",
      "Content:\n",
      "2\n",
      "After two years of condensed reporting, we’re sharing a deeper dive into our approach in one place in our 2023 Environmental Report. In 2022, we continued to make measurable progress in many key ways, such as:\n",
      "• We enhanced and launched new sustainabilityproduct features , such as eco-friendly routing in\n",
      "Maps, which is estimated to have helped preventmore than 1.2 million metric tons of carbon emissionsfrom launch through 2022—equivalent to takingapproximately 250,000 fuel-based cars off the roadfor a year.\n",
      " 3\n",
      "• We expanded the availability of Google EarthEngine —which provides access to reliable, up-to-\n",
      "date insights on how our planet is changing—toinclude businesses and governments worldwide as anenterprise-grade service through Google Cloud.• We opened our new Bay View campus , which is\n",
      "all-electric, net water-positive, restores over 17 acresof high-value nature, and incorporates the leadingprinciples of circular design.\n",
      "\n",
      "Document 9: Document ID: 328 Score: 0.1 Rank: 9 Retriever: sparse\n",
      "\n",
      "Content:\n",
      "Sustainable \n",
      "consumption of \n",
      "public goods (e.g., \n",
      "“right to repair”)Google submitted comments to the European Commission’s public consultation regarding \n",
      "the promotion of repair and reuse of goods. We shared our views on the core principles to \n",
      "consider when introducing policy measures to promote repair and reuse horizontally, and for \n",
      "smartphones and tablets specifically.\n",
      "Body of European \n",
      "Regulators \n",
      "for Electronic \n",
      "Communications \n",
      "(BEREC)Google responded to a questionnaire  by BEREC in view of the development of key performance \n",
      "indicators to characterize the environmental impact of electronic communications, networks, \n",
      "devices, and services. We provided information about our environmental reporting practices \n",
      "and suggestions to help identify which indicators would provide relevant environmental \n",
      "information.\n",
      "Engagement with coalitions and sustainability initiatives\n",
      "RE-Source PlatformGoogle is a strategic partner and steering committee member of the RE-Source Platform, the\n",
      "\n",
      "Document 10: Document ID: 115 Score: 0.08333333333333333 Rank: 10 Retriever: dense\n",
      "\n",
      "Content:\n",
      "of over 140 partner organizations.\n",
      "The Google.org Impact Challenge on Climate Innovation supports breakthrough projects that use data and technology to \n",
      "accelerate climate action.\n",
      "The journey ahead\n",
      "From measuring and monitoring changes on the Earth’s surface, improving forecast and prediction models for flooding and wildfires, optimizing operations, combining disparate data sources, and designing more efficient products, we continue to leverage our expertise in technology and apply the latest advancements to help solve global challenges.\n",
      "We believe that by working together with our partners and \n",
      "customers, we can make a real difference in addressing the challenges of climate change and ecosystem degradation. LEARN MORE\n",
      "• Data Commons\n",
      "• Environmental Insights Explorer\n",
      "• Google Cloud sustainability\n",
      "• Google Earth Engine\n",
      "• Sustainability-focused accelerators   31\n",
      "2023 Environmental Report  Operating \n",
      "sustainably\n",
      "We’re showing the way forward \n",
      "through our own operationsOur ambition\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# User Query\n",
    "result = rag_chain_with_source.invoke(user_query)\n",
    "relevance_score = result['answer']['relevance_score']\n",
    "final_answer = result['answer']['final_answer']\n",
    "retrieved_docs = result['context']\n",
    "\n",
    "print(f\"\\nOriginal Question: {user_query}\\n\")\n",
    "print(f\"Relevance Score: {relevance_score}\\n\")\n",
    "print(f\"Final Answer:\\n{final_answer}\\n\\n\")\n",
    "\n",
    "print(\"Retrieved Documents:\")\n",
    "for i, doc in enumerate(retrieved_docs, start=1):\n",
    "    doc_id = doc.metadata['id']\n",
    "    doc_score = doc.metadata.get('score', 'N/A')\n",
    "    doc_rank = doc.metadata.get('rank', 'N/A')\n",
    "    doc_retriever = doc.metadata.get('retriever', 'N/A')\n",
    "    print(f\"Document {i}: Document ID: {doc_id} Score: {doc_score} Rank: {doc_rank} Retriever: {doc_retriever}\\n\")\n",
    "    print(f\"Content:\\n{doc.page_content}\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e820a44b-dfea-4650-9548-012a2a25fb9a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "name": "CHAPTER8-2_HYBRID_CUSTOM.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
